import requests
from lxml import etree
import pymongo

client = pymongo.MongoClient('localhost', 27017)
db = client.get_database('古诗文')
url = 'https://www.gushiwen.cn/mingjus/'

res = requests.get(url)
tree = etree.HTML(res.text)
items = tree.xpath('//div[@id="type2"]/div[@class="sright"]/a')

for item in items:
    db_name = item.text
    poet_url = 'https://www.gushiwen.cn' + item.xpath('@href')[0]
    poet_res = requests.get(poet_url)
    poet_tree = etree.HTML(poet_res.text)
    poet_items = poet_tree.xpath('//body/div[@class="main3"]/div[@class="left"]/div[@class="sons"]/div[@class="cont"]')
    for a in poet_items:
        verse = a.xpath('.//a[1]/text()')
        underline = a.xpath('.//span/text()')
        poet  = a.xpath('.//a[2]/text()')
        v = verse[0].strip() if verse else ''
        u = underline[0].strip() if underline else ''
        p = poet[0].strip() if poet else ''
        # print(v,u,p)
        if not db_name:  # 没拿到作者名就跳过
            continue
        db[db_name].insert_one({'verse': v, 'underline': u, 'poet': p})
print("数据插入成功！！！")







